Before we dive into scenarios, lets conduct a simple comparison on the overall trend of RCP 4.5 and 8.5. What variables influence annual temperature the most when grouped into two RCP scenarios as a whole?
Methodology
We will be using scatterplots and pearson correlation and RMSE to find similarities and differences between the two RCP scenarios.
Import module / Set options and theme
import pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport numpy as npimport xml.etree.ElementTree as ETimport plotly.express as pximport plotly.graph_objects as gofrom scipy.stats import ttest_relfrom statsmodels.stats.weightstats import ttest_indimport numpy as npimport pingouin as pgfrom scipy.stats import zscoreimport plotly.graph_objects as goimport pandas as pdfrom plotly.subplots import make_subplotsimport warningsimport numpy as npimport pandas as pdfrom sklearn.decomposition import PCAfrom sklearn.cluster import KMeansfrom sklearn.preprocessing import StandardScalerimport matplotlib.pyplot as pltimport plotly.graph_objects as goimport numpy as npimport pandas as pdfrom sklearn.decomposition import PCAfrom sklearn.cluster import KMeansfrom sklearn.preprocessing import StandardScalerimport plotly.express as pxfrom sklearn.manifold import TSNEimport matplotlib.pyplot as pltimport numpy as npimport pandas as pdwarnings.filterwarnings("ignore")pd.set_option('display.max_columns', None)pd.set_option('display.precision', 10)
Feature Selection
When trying to find correlations between Annual Temperature, the temperature related features such as Tmin, Tmax, T_Seasonal always turned out to have a high correlation with annual temperature and therefore acted as a hinderance for proper analysis. I removed all temperature related variables to clearly investigate what properties affect the temperature.
Import cleaned data
df = pd.read_csv('../data/cleaned_df.csv')df['Location_ID'] = df.groupby(['long', 'lat']).ngroup() +1group_list = ['Park', 'long', 'lat', 'veg', 'year', 'TimePeriod', 'RCP','treecanopy', 'Ann_Herb', 'Bare', 'Herb', 'Litter', 'Shrub', 'El', 'Sa','Cl', 'RF', 'Slope', 'E', 'S']veg_location = df.drop(labels='scenario',axis=1).groupby(group_list).mean().reset_index()# veg_location['T_Annual'] = (veg_location['T_Annual'] - veg_location['T_Annual'].min()) / (veg_location['T_Annual'].max() - veg_location['T_Annual'].min())# Average Scenario Dataset# Convert to numeric, coercing errors to NaNnumeric_series = pd.to_numeric(veg_location['RCP'], errors='coerce')numeric_series# Fill NaNs with original non-numeric valuesveg_location['RCP'] = numeric_series.fillna(veg_location['RCP'])four = veg_location[veg_location['RCP'].isin([4.5])]eight = veg_location[veg_location['RCP'].isin([8.5])]four_h = veg_location[veg_location['RCP'].isin(['historical'])]four_h['RCP'] =4.5eight_h = veg_location[veg_location['RCP'].isin(['historical'])]eight_h['RCP'] =8.5df_con = pd.concat([four_h, four, eight_h, eight], ignore_index=True)df_con['Location_ID'] = df_con.groupby(['long', 'lat']).ngroup() +1# Scenario Dataset# Convert to numeric, coercing errors to NaNnumeric_series = pd.to_numeric(df['RCP'], errors='coerce')numeric_series# Fill NaNs with original non-numeric valuesdf['RCP'] = numeric_series.fillna(df['RCP'])four = df[df['RCP'].isin([4.5])]eight = df[df['RCP'].isin([8.5])]four_h = df[df['RCP'].isin(['historical'])]four_h['RCP'] =4.5eight_h = df[df['RCP'].isin(['historical'])]eight_h['RCP'] =8.5df_orig = pd.concat([four_h, four, eight_h, eight], ignore_index=True)df_orig['Location_ID'] = df_orig.groupby(['long', 'lat']).ngroup() +1selected_columns = [col for col in df.columns ifnot col.startswith(('T_', 'Tmin', 'Tmax'))]dropped_columns = [col for col in df.columns if col.startswith(('T_', 'Tmin', 'Tmax'))]filtered_df = df_orig[selected_columns]filtered_df['T_Annual'] = df_orig['T_Annual']df_orig = filtered_dfprint("Dropped Columns : ", dropped_columns)
With a basic scatterplot, we can see basic correlations of how each numerical variable correlates to either the annual temperature or the annual percipitation. Since RCP 8.5 and RCP 4.5 have different predictions, two plots were used for each scenario.
Firstly, without an additional feature, we can see that the more percipitation, the lower the annual temperature because we can easily draw a line with a negative slope through the scaterred plots.
4.5 vs 8.5 scatterplot
# Assuming df_con is your DataFrame and is already loaded# List of columns to use for coloringtest = df_con.iloc[:,list(range(1, 3))+ [4,6] +list(range(8, len(df_con.columns)-1))]color_columns =list(test.columns)rcp_values = test['RCP'].unique()subplot_titles = [f'RCP {rcp}'for rcp in rcp_values]# Create figure with subplots for each RCP valuefig = make_subplots(rows=1, cols=len(rcp_values), shared_yaxes=True, subplot_titles=subplot_titles, horizontal_spacing=0.15)# Add a scatter trace for each color column and each RCP valuefor i, col inenumerate(color_columns):for j, rcp inenumerate(rcp_values): fig.add_trace( go.Scatter( x=test[(test['year'].isin(range(2060, 2100))) & (test['RCP'] == rcp)]['PPT_Annual'], y=test[(test['year'].isin(range(2060, 2100))) & (test['RCP'] == rcp)]['T_Annual'], mode='markers', marker=dict( color=test[(test['year'].isin(range(2060, 2100))) & (test['RCP'] == rcp)][col], colorbar=dict(# title='Scale', tickmode='array', tickvals=[round(i,2) for i in np.linspace(start=round(min(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),stop=round(max(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),num=5)], ticktext=[round(i,2) for i in np.linspace(start=round(min(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),stop=round(max(test[(test['year'].isin(range(2060, 2100)) & (test['RCP'] == rcp))][col]),2),num=5)], y=0.5, x=0.43+ (j*0.58) ), colorscale='rdpu' ), name=col, visible=Trueif i ==0elseFalse, hovertemplate=(f"<b>{col}</b><br>""Precipitation: %{x}<br>""Temperature: %{y}<br>""RCP: "+str(rcp) +"<br>""Value: %{marker.color}<br>""<extra></extra>" ) # This hides the secondary box with trace info # Only the first trace is visible initially ), row=1, col=j+1 )# Updating the layout to add the titlefig.update_layout( title={'text': '<b>Annual Precipitation vs Temperature by RCP Scenarios</b>','x': 0.5,'y': 0.97,'xanchor': 'center' },# title_font=dict(size=20), showlegend=False# Hide legend since we are using colorbars)# Adding dropdown filter to change visible tracedropdown_buttons = [ {'label': col,'method': 'update','args': [ {'visible': [col == color_column for color_column in color_columns for _ in rcp_values] }, {'title': {'text': f'<b>Annual Precipitation vs Temperature by {col}</b>', 'x':0.5, 'y':0.97},'marker': {'colorbar': {'title': 'Scale'}} } ] }for col in color_columns]fig.update_layout( updatemenus=[ {'buttons': dropdown_buttons,'direction': 'down','showactive': True,'x': 0.5,'xanchor': 'center','y': 1.19,'yanchor': 'top' } ])fig.update_xaxes(title_text="Annual Precipitation", row=1, col=1)fig.update_yaxes(title_text="Annual Temperature", row=1, col=1)fig.update_xaxes(title_text="Annual Precipitation", row=1, col=2)for annotation in fig['layout']['annotations']: annotation['font'] = {'size': 12, 'color': 'black'}# Show the figurefig.show()